Setup

library(tidyverse)
library(magrittr)
library(furrr)
library(gghighlight)
library(shinystan)
library(tidybayes)
library(ggridges)
library(rstatix)
library(effectsize)
library(cmdstanr)
plan(multisession)

# User-defined functions
source("../function/r/my_functions.R")

# Import data
# We modified a column name because we realized that
# "block" sounds more intuitive than "task" in description of the analysis.
df_gamble <- read_csv("../data/preprocessed/gamble.csv") %>%
  rename(block = task)
df_solo <- read_csv("../data/preprocessed/solo.csv") %>%
  rename(block = task)
df_group <- read_csv("../data/preprocessed/group.csv") %>%
  rename(block = task)
df_solo_group <- bind_rows(df_solo, df_group)
df_gabor_solo <- read_csv("../data/preprocessed/solo_gabor.csv") %>%
  rename(block = task)
df_gabor_group <- read_csv("../data/preprocessed/group_gabor.csv") %>%
  rename(block = task)
df_gabor <- bind_rows(df_gabor_solo, df_gabor_group)
df_questionnaire <- read_csv("../data/preprocessed/questionnaire.csv")

# Import the fitting result of the gambling task
median_powutil <- readRDS("../output/data/gamble/median_powutil.rds")


Task

After working on the gambling task, participants performed an orientation-averaging task (Navajas et al., 2017, Nature Human Behaviour; https://www.nature.com/articles/s41562-017-0215-1).

Task flow:

  1. A series of tilted 30 Gabor patches was presented. Variance of orientations (i.e., task difficulty) had four levels: 8, 16, 32, and 64 degrees.

  2. Participants judged whether the mean orientation of Gabor patches was clockwise or anticlockwise.

  3. Participants then chose a sure or risky option. The sure option guaranteed a certain but small reward, 500 JPY. The risky option yielded a larger reward (r JPY) if participants answered the orientation correctly; otherwise, participants received nothing.

    • r had 12 levels: {550, 590, 640, 700, 770, 850, 940, 1040, 1150, 1270, 1400, 1540}

    • r was randomly jittered every trial by adding a small amount randomly sampled from {-10, 0, 10}.

  4. Participants rated their confidence about the judgment of orientation on a 6-point scale (1: Not confident at all; 6: Very confident).

Participants worked on 96 trials of the task. Trial order was counterbalanced across participants.


Basic data

We first checked the data.

head(df_solo_group)


line_pos: Positions of the lines (ca: clockwise-anticlockwise; ac: anticlockwise-clockwise).

option_pos: Positions of the options (rs: risky-sure; sr: sure-risky).


Orientations of the stimuli are saved as follows:

head(df_gabor, n = 30)


No learning effect between blocks

We first checked individual accuracy in the solo and group blocks. In the following analysis, we excluded trials in the group block where participants chose whether to vote or not before the stimulus presentation (order == "pre"). remove_order_pre() is defined in ../function/r/my_functions.R.

As shown below, almost no difference in participants’ accuracies was observed between the blocks. The number at the top of each panel indicates the variance of orientations, and jittered points indicate individual data.

df_solo_group %>%
  remove_order_pre() %>%
  group_by(id, ori_var, block) %>%
  summarise(accuracy = mean(result == "correct")) %>%
  rename(Block = block) %>%
  ggplot(aes(accuracy, Block, color = Block, fill = Block)) +
  stat_halfeye(
    position = position_nudge(y = 0.2),
    height = 0.7, point_color = NA, .width = 0
  ) +
  geom_point(
    position = position_jitter(width = 0, height = 0.1, seed = 1),
    size = 0.5
  ) +
  scale_x_continuous(breaks = seq(0, 1, 0.2), limits = c(0.2, 1)) +
  scale_color_manual(values = c("black", "gray50")) +
  scale_fill_manual(values = c("black", "gray50")) +
  facet_wrap(vars(fct_rev(ori_var)), ncol = 4) +
  labs(x = "Individual accuracy", y = "Block") +
  guides(color = "none", fill = "none") +
  theme_facet +
  theme(panel.border = element_rect(color = "black"))


The figure shows mean accuracies \(\pm\) standard errors merged across participants.

df_solo_group %>%
  remove_order_pre() %>%
  group_by(id, ori_var, block) %>%
  summarise(ind_mean = mean(result == "correct")) %>%
  group_by(ori_var, block) %>%
  summarise(mean = mean(ind_mean), se = se(ind_mean)) %>%
  rename(Block = block) %>%
  ggplot(aes(
    fct_rev(ori_var), mean, group = Block,
    color = Block, shape = Block, linetype = Block
  )) +
  geom_point(position = position_dodge(width = 0.2)) +
  geom_line(position = position_dodge(width = 0.2)) +
  geom_errorbar(
    aes(ymin = mean - se, ymax = mean + se), 
    position = position_dodge(width = 0.2),
    width = 0.1, linetype = "solid", show.legend = FALSE
  ) +
  geom_hline(yintercept = 0.5, linetype = "dashed") +
  scale_color_manual(values = c("black", "gray50")) +
  ylim(0.5, 1) +
  labs(x = "Variance of orientations", y = "Accuracy") +
  theme(legend.position = "top")


We conducted two-way repeated ANOVA as Navajas et al. (2017) did. The ANOVA revealed that only the main effect of task difficulty was significant. The main effect of block was not significant.

df_solo_group %>%
  remove_order_pre() %>%
  group_by(id, ori_var, block) %>%
  summarise(accuracy = mean(result == "correct"), .groups = "drop") %>%
  anova_test(
    dv = accuracy,
    within = c(ori_var, block),
    wid = id
  ) %>%
  get_anova_table(correction = "GG")

options(contrasts = c("contr.sum", "contr.poly"))
df_solo_group %>%
  remove_order_pre() %>%
  group_by(id, ori_var, block) %>%
  summarise(accuracy = mean(result == "correct"), .groups = "drop") %>%
  aov(accuracy ~ ori_var * block + Error(id / (ori_var * block)), data = .) %>%
  eta_squared(ci = .95)


We thus merged the data in both blocks to estimate cognitive parameters as shown below.


Risky decisions

In the solo block, participants chose a risky option more frequently when the task was easier and the reward was larger. Error bars indicate standard errors of the means.

df_solo %>%
  mutate(ori_var = factor(ori_var)) %>%
  group_by(id, ori_var, payoff_risky) %>%
  summarise(ind_mean = mean(choice == "risky")) %>%
  group_by(ori_var, payoff_risky) %>%
  summarise(mean = mean(ind_mean), se = se(ind_mean)) %>%
  ggplot(aes(payoff_risky, mean, color = factor(ori_var))) +
  geom_point() +
  geom_line() +
  geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = 10) +
  scale_x_continuous(breaks = seq(500, 1750, 250), labels = scales::comma) +
  scale_color_viridis_d(direction = -1) +
  xlim(500, 1600) +
  labs(
    x = "Reward of the risky option (JPY)",
    y = "Choice rate of the risky option",
    color = "Variance of orientations"
  ) +
  theme(legend.position = "top")


Not surprisingly, a positive correlation was observed between the choice frequency of the risky option in the solo block and \(\rho\), which was estimated from the gambling task.

df_solo %>%
  group_by(id) %>%
  summarise(n_risky = sum(choice == "risky")) %>%
  left_join(median_powutil, by = "id") %>%
  ggplot(aes(rho, n_risky)) +
  geom_smooth(method = lm, formula = y ~ x, se = FALSE) +
  geom_point() + 
  scale_x_continuous(breaks = seq(0.2, 1.4, 0.2)) +
  labs(
    x = "Risk preference (\u03c1)",
    y = "Choice frequency of the risky option"
  )


The positive correlation between the above two variables:

df_solo %>%
  group_by(id) %>%
  summarise(n_risky = sum(choice == "risky")) %>%
  left_join(median_powutil, by = "id") %$%
  cor.test(~ rho + n_risky)
#> 
#>  Pearson's product-moment correlation
#> 
#> data:  rho and n_risky
#> t = 3.6639, df = 61, p-value = 0.0005219
#> alternative hypothesis: true correlation is not equal to 0
#> 95 percent confidence interval:
#>  0.1977427 0.6084413
#> sample estimates:
#>       cor 
#> 0.4247001


Confidence rating

The figure shows mean confidence \(\pm\) SE as a function of the result and the task difficulty. This pattern is similar to the result in Navajas et al. (2017).

df_solo %>%
  mutate(
    ori_var = factor(ori_var),
    result = if_else(result == "correct", "Correct", "Wrong")
  ) %>%
  group_by(id, result, ori_var) %>%
  summarise(ind_mean = mean(rating)) %>%
  group_by(result, ori_var) %>%
  summarise(mean = mean(ind_mean), se = se(ind_mean)) %>%
  rename(Result = result) %>%
  ggplot(aes(
    fct_rev(ori_var), mean, group = Result,
    color = Result, linetype = Result, shape = Result
  )) +
  geom_point() +
  geom_line() +
  geom_errorbar(
    aes(ymin = mean - se, ymax = mean + se),
    width = 0.1, linetype = "solid", show.legend = FALSE
  ) +
  scale_y_continuous(breaks = 1:6, limits = c(1, 6)) +
  scale_color_manual(values = c("red3", "#56B4E9")) +
  labs(x = "Variance of orientations", y = "Confidence rating") +
  theme(legend.position = "top")


Response time

The figure shows the response time as a function of the task difficulty and the correct/wrong responses.

df_solo_group %>%
  remove_order_pre() %>%
  group_by(id, result, ori_var) %>%
  summarise(ind_mean = mean(judge_rt)) %>%
  group_by(result, ori_var) %>%
  summarise(mean = mean(ind_mean), se = se(ind_mean)) %>%
  rename(Result = result) %>%
  ggplot(aes(
    fct_rev(ori_var), mean, group = Result,
    color = Result, linetype = Result, shape = Result
  )) +
  geom_point() +
  geom_line() +
  geom_errorbar(
    aes(ymin = mean - se, ymax = mean + se),
    width = 0.1, linetype = "solid", show.legend = FALSE
  ) +
  scale_color_manual(values = c("red3", "#56B4E9")) +
  labs(x = "Variance of orientations", y = "Response time (secs.)") +
  theme(legend.position = "top")


Parameter estimation

We then estimated perceptual parameters in the task (see Navajas et al., 2017, Nature Human Behaviour).


Model

Our model is based on Navajas et al. (2017). We assume that participants updated the estimate of mean orientation as follows:

\[ \mu_{i} = \lambda\mu_{i-1} + \theta_{i} + \epsilon\theta_{i}\xi_{i} \]

where \(\mu_{i}\) is the estimate of the mean after i samples (\(\mu_{0}=0\)), \(\lambda\) determines the relative weighting of recent versus past samples, \(\theta_{i}\) is the actual orientation of the \(i_{th}\) sample in the sequence, \(\xi_{i}\) is sampled from the standard normal distribution, and \(\epsilon\) is a positive free parameter indicating the strength of the noise.

The multiplicative nature of the noise ensures that the uncertainty in the update of the estimate scales with the magnitude of \(\theta_{i}\). At the end of the sequence, choice is determined by the sign of the final value of the mean (\(\mu_{30}\)): the agent chooses clockwise if \(\mu_{30}\) is positive, and anticlockwise if \(\mu_{30}\) is negative.


Simulation

We first defined the functions for the simulation in ../function/r/my_functions.R.

calc_mu
#> function (theta, lambda, epsilon, id) 
#> {
#>     n_theta <- length(theta)
#>     mu_samples <- numeric(n_theta)
#>     noise <- rnorm(n_theta)
#>     for (i in 1:n_theta) {
#>         if (i == 1) {
#>             mu <- 0
#>         }
#>         else {
#>             mu <- lambda * mu + theta[i] + epsilon * theta[i] * 
#>                 noise[i]
#>         }
#>         mu_samples[i] <- mu
#>     }
#>     list(sim_id = id, order = 0:(n_theta - 1), mu = mu_samples)
#> }
sim_updating_model
#> function (n_theta = 30, min_theta = -13, max_theta = 19, epsilon, 
#>     lambda, n_sim = 1000, seed = 1) 
#> {
#>     set.seed(seed)
#>     theta <- runif(n = n_theta + 1, min = min_theta, max = max_theta)
#>     1:n_sim %>% future_imap_dfr(~calc_mu(theta = theta, lambda = lambda, 
#>         epsilon = epsilon, id = .y), .options = furrr_options(seed = seed))
#> }


For example, assume that a participant have \(\epsilon = 1\) and \(\lambda = 0.95\). If he/she worked on a trial (thetas’ range: -13 ~ +19) 1000 times, the 1000 estimates is simulated as follows (three pathways are highlighted as example):

n_theta <- 30
min_theta <- -13
max_theta <- 19
epsilon <- 1
lambda <- 0.95

sim_updating_model(
  n_theta = n_theta, min_theta = min_theta, max_theta = max_theta,
  epsilon = epsilon, lambda = lambda, n_sim = 1000, seed = 1
) %>%
  ggplot(aes(order, mu, group = sim_id, color = sim_id)) +
  geom_line() +
  gghighlight(
    sim_id <= 3, use_direct_label = FALSE,
    unhighlighted_params = list(size = 0.1)
  ) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  scale_color_viridis_c() +
  labs(x = "Gabor patches", y = "\u03bc") +
  guides(color = "none")


The figure shows 100000 samples of \(\mu_{30}\).

sim_updating_model(
  n_theta = n_theta, min_theta = min_theta, max_theta = max_theta,
  epsilon = epsilon, lambda = lambda, n_sim = 100000, seed = 1
) %>%
  filter(order == 30) %>%
  ggplot(aes(mu)) +
  geom_histogram(binwidth = 10, color = "white") +
  geom_vline(xintercept = 0, linetype = "dashed") +
  labs(x = "\u03bc30", y = "Samples")


The \(\mu_{30}\) converges to the normal distribution. The mean is

\[ \bar{\mu}_{30} = \sum_{i=1}^{30}\lambda^{30-i}\theta_{i}, \]

and the variance is

\[ \sigma^2_{30}=\epsilon^2\sum^{30}_{i=1}\lambda^{2(30-i)}\theta_{i}^2. \]


In the example above, the mean, SD, and \(\Phi\) (cumulative probability of the normal distribution) is:

stat_updating_model(
  n_theta = n_theta, min_theta = min_theta, max_theta = max_theta,
  epsilon = epsilon, lambda = lambda, seed = 1
)
#> $mu
#> [1] 42.24533
#> 
#> $sigma
#> [1] 29.61256
#> 
#> $phi
#> [1] 0.9231526


stat_updating_model() is defined in ../function/r/my_functions.R.


Hierarchical Bayesian modeling

See the Stan code (../function/gabor/stochastic_updating.stan) for more details of the priors.

# Make a data set for the estimation
tmp_df_solo_group <- df_solo_group %>%
  # In the next line, exclude trials in the group block
  # where participants chose individual or majority
  # prior to the stimulus presentation (order == "pre").
  filter(order == "post" | is.na(order)) %>%
  mutate(
    judge  = as.numeric(judge == "clockwise"),
    answer = as.numeric(answer == "clockwise")
  ) %>%
  arrange(id, trial, block)

tmp_df_gabor <- df_gabor %>%
  inner_join(tmp_df_solo_group, by = c("id", "trial", "block")) %>%
  rename(theta = ori) %>%
  arrange(id, trial, block)

datalist_gabor <- tmp_df_solo_group %$%
  list(
    N = nrow(.),
    N_subj = length(levels(factor(id))),
    id = id,
    judge = judge,
    answer = answer,
    theta_var = ori_var,
    theta = tmp_df_gabor %>%
      pull(theta) %>%
      matrix(nrow = 30, ncol = nrow(tmp_df_solo_group))
  )

rm(tmp_df_solo_group, tmp_df_gabor)

# Compile
model_gabor <- cmdstan_model("../function/stan/gabor/stochastic_updating.stan")

# Sampling
set.seed(1)
fit_gabor <- model_gabor$sample(
  datalist_gabor, seed = 1, refresh = 100,
  chains = 4, iter_warmup = 500, iter_sampling = 2000,
  init = function() {list(
    epsilon = runif(63, 1, 2),
    lambda = runif(63, 0.9, 1)
  )}
)

fit_gabor_rstan <- cmdstan2rstan(fit_gabor)


We checked the model diagnostics.

launch_shinystan(fit_gabor_rstan)


Posterior predictive checking

We generated predictive data from 20 draws and compared it with the actual choice data. The posterior predictive samples fit the actual data well.

fit_gabor %>%
  spread_draws(y_pred[id][ori_var]) %>%
  sample_draws(ndraws = 20, seed = 1) %>%
  ungroup() %>%
  mutate(
    n_correct = y_pred,
    draw = factor(.draw),
    ori_var = case_when(
      ori_var == 1 ~ 8,
      ori_var == 2 ~ 16,
      ori_var == 3 ~ 32,
      ori_var == 4 ~ 64
    )
  ) %>%
  ggplot(aes(n_correct, ..density..)) +
  geom_line(aes(group = draw), stat = "density", color = "gray", size = 0.5) +
  geom_line(
    data = df_solo_group %>%
      filter(order == "post" | is.na(order)) %>%
      group_by(id, ori_var) %>%
      summarise(n_correct = sum(result == "correct")),
    stat = "density", color = "#009E73", size = 2
  ) +
  facet_wrap(vars(ori_var)) +
  labs(x = "The number of correct responses", y = "Density") +
  theme_facet +
  theme(legend.position = "top")


Point estimates of the parameters

We first saved the medians of the parameters as median_gabor.

median_gabor <- fit_gabor %>%
  spread_draws(epsilon[id], lambda[id]) %>%
  median_qi() %>%
  select(id, epsilon, lambda) %>%
  mutate(gamma = -epsilon)


The figures show posterior distributions of each parameter. mutate_yaxis_num() is defined in ../function/r/my_functions.R.

Perceptual noise (\(\epsilon\))

fit_gabor %>%
  spread_draws(epsilon[id]) %>%
  left_join(mutate_yaxis_num(median_gabor, column = epsilon), by = "id") %>%
  ggplot(aes(epsilon, factor(yaxis_num))) +
  geom_density_ridges(rel_min_height = 0.01) +
  scale_x_continuous(breaks = seq(0, 7, 0.5)) +
  scale_y_discrete(breaks = c(seq(10, 60, 10))) +
  labs(x = "Perceptual noise (\u03b5)", y = "Participants")


Relative weight for past information (\(\lambda\))

fit_gabor %>%
  spread_draws(lambda[id]) %>%
  left_join(mutate_yaxis_num(median_gabor, column = lambda), by = "id") %>%
  ggplot(aes(lambda, factor(yaxis_num))) +
  geom_vline(xintercept = 1, linetype = "dashed") +
  geom_density_ridges(rel_min_height = 0.01) +
  scale_x_continuous(breaks = seq(0.8, 1.2, 0.05)) +
  scale_y_discrete(breaks = c(seq(10, 60, 10))) +
  labs(x = "Relative weight for past information (\u03bb)", y = "Participants")


Summary stats of \(\gamma\)

By definition, \(\epsilon\) captures the strength of the participant’s perceptual noise. We thus decided to use the minus of \(\epsilon\) as the competence parameter. Hereafter, we refer this parameter as \(\gamma\).

library(WRS2)
median_gabor %>%
  summarise(
    mean_gamma = mean(gamma),
    median_gamma = median(gamma),
    sd_gamma = sd(gamma),
    min_gamma = min(gamma),
    max_gamma = max(gamma)
  )


The figure shows that \(\gamma\) reflects participants’ performance in the task.

df_solo_group %>%
  remove_order_pre() %>%
  group_by(id) %>%
  summarise(mean = mean(result == "correct")) %>%
  left_join(median_gabor, by = "id") %>%
  ggplot(aes(gamma, mean)) +
  geom_smooth(method = lm, formula = y ~ x, se = FALSE) +
  geom_point() +
  labs(x = "Competence (\u03b3)", y = "Individual accuracy")


The positive correlation between the above two variables:

df_solo_group %>%
  remove_order_pre() %>%
  group_by(id) %>%
  summarise(mean = mean(result == "correct")) %>%
  left_join(median_gabor, by = "id") %$%
  pbcor(x = lambda, y = mean)
#> Call:
#> pbcor(x = lambda, y = mean)
#> 
#> Robust correlation coefficient: 0.4139
#> Test statistic: 3.5513
#> p-value: 0.00075
df_solo_group %>%
  remove_order_pre() %>%
  group_by(id) %>%
  summarise(mean = mean(result == "correct")) %>%
  left_join(median_gabor, by = "id") %$%
  cor.test(~ gamma + lambda, method = "s")
#> 
#>  Spearman's rank correlation rho
#> 
#> data:  gamma and lambda
#> S = 35326, p-value = 0.2333
#> alternative hypothesis: true rho is not equal to 0
#> sample estimates:
#>       rho 
#> 0.1521217
median_gabor %>%
  ggplot(aes(gamma, lambda)) +
  geom_point()